{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 数据分组"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 分组键是列名  \n",
    "分组键是列名时将某一列或者多列的列名传给groupby()方法，groupby()方法会按照这一列或多列进行分组。"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**按照一列进行分组**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>用户ID</th>\n",
       "      <th>客户分类</th>\n",
       "      <th>区域</th>\n",
       "      <th>是否省会</th>\n",
       "      <th>7月销量</th>\n",
       "      <th>8月销量</th>\n",
       "      <th>9月销量</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>59224</td>\n",
       "      <td>A类</td>\n",
       "      <td>一线城市</td>\n",
       "      <td>是</td>\n",
       "      <td>6</td>\n",
       "      <td>20</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>55295</td>\n",
       "      <td>B类</td>\n",
       "      <td>三线城市</td>\n",
       "      <td>否</td>\n",
       "      <td>37</td>\n",
       "      <td>27</td>\n",
       "      <td>35</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>46035</td>\n",
       "      <td>A类</td>\n",
       "      <td>二线城市</td>\n",
       "      <td>是</td>\n",
       "      <td>8</td>\n",
       "      <td>1</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2459</td>\n",
       "      <td>C类</td>\n",
       "      <td>一线城市</td>\n",
       "      <td>是</td>\n",
       "      <td>7</td>\n",
       "      <td>8</td>\n",
       "      <td>14</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2217</td>\n",
       "      <td>B类</td>\n",
       "      <td>三线城市</td>\n",
       "      <td>否</td>\n",
       "      <td>9</td>\n",
       "      <td>12</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>22557</td>\n",
       "      <td>A类</td>\n",
       "      <td>二线城市</td>\n",
       "      <td>否</td>\n",
       "      <td>42</td>\n",
       "      <td>20</td>\n",
       "      <td>55</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    用户ID 客户分类    区域 是否省会  7月销量  8月销量  9月销量\n",
       "0  59224   A类  一线城市    是     6    20     0\n",
       "1  55295   B类  三线城市    否    37    27    35\n",
       "2  46035   A类  二线城市    是     8     1     8\n",
       "3   2459   C类  一线城市    是     7     8    14\n",
       "4   2217   B类  三线城市    否     9    12     4\n",
       "5  22557   A类  二线城市    否    42    20    55"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "df = pd.read_excel(r\"../Data/Chapter10.xlsx\",sheet_name =0)\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr:last-of-type th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th colspan=\"8\" halign=\"left\">7月销量</th>\n",
       "      <th colspan=\"2\" halign=\"left\">8月销量</th>\n",
       "      <th>...</th>\n",
       "      <th colspan=\"2\" halign=\"left\">9月销量</th>\n",
       "      <th colspan=\"8\" halign=\"left\">用户ID</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th>count</th>\n",
       "      <th>mean</th>\n",
       "      <th>std</th>\n",
       "      <th>min</th>\n",
       "      <th>25%</th>\n",
       "      <th>50%</th>\n",
       "      <th>75%</th>\n",
       "      <th>max</th>\n",
       "      <th>count</th>\n",
       "      <th>mean</th>\n",
       "      <th>...</th>\n",
       "      <th>75%</th>\n",
       "      <th>max</th>\n",
       "      <th>count</th>\n",
       "      <th>mean</th>\n",
       "      <th>std</th>\n",
       "      <th>min</th>\n",
       "      <th>25%</th>\n",
       "      <th>50%</th>\n",
       "      <th>75%</th>\n",
       "      <th>max</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>客户分类</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>A类</th>\n",
       "      <td>3.0</td>\n",
       "      <td>18.666667</td>\n",
       "      <td>20.231988</td>\n",
       "      <td>6.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>25.0</td>\n",
       "      <td>42.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>13.666667</td>\n",
       "      <td>...</td>\n",
       "      <td>31.50</td>\n",
       "      <td>55.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>42605.333333</td>\n",
       "      <td>18572.538392</td>\n",
       "      <td>22557.0</td>\n",
       "      <td>34296.0</td>\n",
       "      <td>46035.0</td>\n",
       "      <td>52629.5</td>\n",
       "      <td>59224.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>B类</th>\n",
       "      <td>2.0</td>\n",
       "      <td>23.000000</td>\n",
       "      <td>19.798990</td>\n",
       "      <td>9.0</td>\n",
       "      <td>16.0</td>\n",
       "      <td>23.0</td>\n",
       "      <td>30.0</td>\n",
       "      <td>37.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>19.500000</td>\n",
       "      <td>...</td>\n",
       "      <td>27.25</td>\n",
       "      <td>35.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>28756.000000</td>\n",
       "      <td>37531.813732</td>\n",
       "      <td>2217.0</td>\n",
       "      <td>15486.5</td>\n",
       "      <td>28756.0</td>\n",
       "      <td>42025.5</td>\n",
       "      <td>55295.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>C类</th>\n",
       "      <td>1.0</td>\n",
       "      <td>7.000000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>7.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>8.000000</td>\n",
       "      <td>...</td>\n",
       "      <td>14.00</td>\n",
       "      <td>14.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2459.000000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2459.0</td>\n",
       "      <td>2459.0</td>\n",
       "      <td>2459.0</td>\n",
       "      <td>2459.0</td>\n",
       "      <td>2459.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>3 rows × 32 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "      7月销量                                                     8月销量  \\\n",
       "     count       mean        std  min   25%   50%   75%   max count   \n",
       "客户分类                                                                  \n",
       "A类     3.0  18.666667  20.231988  6.0   7.0   8.0  25.0  42.0   3.0   \n",
       "B类     2.0  23.000000  19.798990  9.0  16.0  23.0  30.0  37.0   2.0   \n",
       "C类     1.0   7.000000        NaN  7.0   7.0   7.0   7.0   7.0   1.0   \n",
       "\n",
       "                  ...      9月销量        用户ID                              \\\n",
       "           mean   ...       75%   max count          mean           std   \n",
       "客户分类              ...                                                     \n",
       "A类    13.666667   ...     31.50  55.0   3.0  42605.333333  18572.538392   \n",
       "B类    19.500000   ...     27.25  35.0   2.0  28756.000000  37531.813732   \n",
       "C类     8.000000   ...     14.00  14.0   1.0   2459.000000           NaN   \n",
       "\n",
       "                                                   \n",
       "          min      25%      50%      75%      max  \n",
       "客户分类                                               \n",
       "A类    22557.0  34296.0  46035.0  52629.5  59224.0  \n",
       "B类     2217.0  15486.5  28756.0  42025.5  55295.0  \n",
       "C类     2459.0   2459.0   2459.0   2459.0   2459.0  \n",
       "\n",
       "[3 rows x 32 columns]"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.groupby(\"客户分类\").count()\n",
    "g=df.groupby(df[\"客户分类\"])\n",
    "g.mean()\n",
    "g.describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>用户ID</th>\n",
       "      <th>7月销量</th>\n",
       "      <th>8月销量</th>\n",
       "      <th>9月销量</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>客户分类</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>A类</th>\n",
       "      <td>127816</td>\n",
       "      <td>56</td>\n",
       "      <td>41</td>\n",
       "      <td>63</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>B类</th>\n",
       "      <td>57512</td>\n",
       "      <td>46</td>\n",
       "      <td>39</td>\n",
       "      <td>39</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>C类</th>\n",
       "      <td>2459</td>\n",
       "      <td>7</td>\n",
       "      <td>8</td>\n",
       "      <td>14</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        用户ID  7月销量  8月销量  9月销量\n",
       "客户分类                          \n",
       "A类    127816    56    41    63\n",
       "B类     57512    46    39    39\n",
       "C类      2459     7     8    14"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.groupby(\"客户分类\").sum()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**按照多列进行分组**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>用户ID</th>\n",
       "      <th>是否省会</th>\n",
       "      <th>7月销量</th>\n",
       "      <th>8月销量</th>\n",
       "      <th>9月销量</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>客户分类</th>\n",
       "      <th>区域</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">A类</th>\n",
       "      <th>一线城市</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>二线城市</th>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>B类</th>\n",
       "      <th>三线城市</th>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>C类</th>\n",
       "      <th>一线城市</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "           用户ID  是否省会  7月销量  8月销量  9月销量\n",
       "客户分类 区域                                \n",
       "A类   一线城市     1     1     1     1     1\n",
       "     二线城市     2     2     2     2     2\n",
       "B类   三线城市     2     2     2     2     2\n",
       "C类   一线城市     1     1     1     1     1"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.groupby([\"客户分类\",\"区域\"]).count()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>客户分类</th>\n",
       "      <th>区域</th>\n",
       "      <th>用户ID</th>\n",
       "      <th>是否省会</th>\n",
       "      <th>7月销量</th>\n",
       "      <th>8月销量</th>\n",
       "      <th>9月销量</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>A类</td>\n",
       "      <td>一线城市</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>A类</td>\n",
       "      <td>二线城市</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>B类</td>\n",
       "      <td>三线城市</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>C类</td>\n",
       "      <td>一线城市</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  客户分类    区域  用户ID  是否省会  7月销量  8月销量  9月销量\n",
       "0   A类  一线城市     1     1     1     1     1\n",
       "1   A类  二线城市     2     2     2     2     2\n",
       "2   B类  三线城市     2     2     2     2     2\n",
       "3   C类  一线城市     1     1     1     1     1"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.groupby([\"客户分类\",\"区域\"]).count().reset_index()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>用户ID</th>\n",
       "      <th>7月销量</th>\n",
       "      <th>8月销量</th>\n",
       "      <th>9月销量</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>客户分类</th>\n",
       "      <th>区域</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">A类</th>\n",
       "      <th>一线城市</th>\n",
       "      <td>59224</td>\n",
       "      <td>6</td>\n",
       "      <td>20</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>二线城市</th>\n",
       "      <td>68592</td>\n",
       "      <td>50</td>\n",
       "      <td>21</td>\n",
       "      <td>63</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>B类</th>\n",
       "      <th>三线城市</th>\n",
       "      <td>57512</td>\n",
       "      <td>46</td>\n",
       "      <td>39</td>\n",
       "      <td>39</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>C类</th>\n",
       "      <th>一线城市</th>\n",
       "      <td>2459</td>\n",
       "      <td>7</td>\n",
       "      <td>8</td>\n",
       "      <td>14</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            用户ID  7月销量  8月销量  9月销量\n",
       "客户分类 区域                           \n",
       "A类   一线城市  59224     6    20     0\n",
       "     二线城市  68592    50    21    63\n",
       "B类   三线城市  57512    46    39    39\n",
       "C类   一线城市   2459     7     8    14"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.groupby([\"客户分类\",\"区域\"]).sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "客户分类\n",
       "A类    3\n",
       "B类    2\n",
       "C类    1\n",
       "Name: 用户ID, dtype: int64"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.groupby(\"客户分类\")[\"用户ID\"].count()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 分组键是Series"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**按照一个Series进行分组**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>用户ID</th>\n",
       "      <th>区域</th>\n",
       "      <th>是否省会</th>\n",
       "      <th>7月销量</th>\n",
       "      <th>8月销量</th>\n",
       "      <th>9月销量</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>客户分类</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>A类</th>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>B类</th>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>C类</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      用户ID  区域  是否省会  7月销量  8月销量  9月销量\n",
       "客户分类                                  \n",
       "A类       3   3     3     3     3     3\n",
       "B类       2   2     2     2     2     2\n",
       "C类       1   1     1     1     1     1"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df[\"客户分类\"]\n",
    "df.groupby(df[\"客户分类\"]).count()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**按照多个Series进行分组**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>7月销量</th>\n",
       "      <th>8月销量</th>\n",
       "      <th>9月销量</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>客户分类</th>\n",
       "      <th>用户ID</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"3\" valign=\"top\">A类</th>\n",
       "      <th>22557</th>\n",
       "      <td>42</td>\n",
       "      <td>20</td>\n",
       "      <td>55</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>46035</th>\n",
       "      <td>8</td>\n",
       "      <td>1</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>59224</th>\n",
       "      <td>6</td>\n",
       "      <td>20</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">B类</th>\n",
       "      <th>2217</th>\n",
       "      <td>9</td>\n",
       "      <td>12</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>55295</th>\n",
       "      <td>37</td>\n",
       "      <td>27</td>\n",
       "      <td>35</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>C类</th>\n",
       "      <th>2459</th>\n",
       "      <td>7</td>\n",
       "      <td>8</td>\n",
       "      <td>14</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            7月销量  8月销量  9月销量\n",
       "客户分类 用户ID                   \n",
       "A类   22557    42    20    55\n",
       "     46035     8     1     8\n",
       "     59224     6    20     0\n",
       "B类   2217      9    12     4\n",
       "     55295    37    27    35\n",
       "C类   2459      7     8    14"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.groupby([df[\"客户分类\"],df[\"用户ID\"]]).sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "客户分类\n",
       "A类    3\n",
       "B类    2\n",
       "C类    1\n",
       "Name: 用户ID, dtype: int64"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.groupby(df[\"客户分类\"])[\"用户ID\"].count()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## aggregate方法"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr:last-of-type th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th colspan=\"2\" halign=\"left\">用户ID</th>\n",
       "      <th colspan=\"2\" halign=\"left\">区域</th>\n",
       "      <th colspan=\"2\" halign=\"left\">是否省会</th>\n",
       "      <th colspan=\"2\" halign=\"left\">7月销量</th>\n",
       "      <th colspan=\"2\" halign=\"left\">8月销量</th>\n",
       "      <th colspan=\"2\" halign=\"left\">9月销量</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th>count</th>\n",
       "      <th>sum</th>\n",
       "      <th>count</th>\n",
       "      <th>sum</th>\n",
       "      <th>count</th>\n",
       "      <th>sum</th>\n",
       "      <th>count</th>\n",
       "      <th>sum</th>\n",
       "      <th>count</th>\n",
       "      <th>sum</th>\n",
       "      <th>count</th>\n",
       "      <th>sum</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>客户分类</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>A类</th>\n",
       "      <td>3</td>\n",
       "      <td>127816</td>\n",
       "      <td>3</td>\n",
       "      <td>一线城市二线城市二线城市</td>\n",
       "      <td>3</td>\n",
       "      <td>是是否</td>\n",
       "      <td>3</td>\n",
       "      <td>56</td>\n",
       "      <td>3</td>\n",
       "      <td>41</td>\n",
       "      <td>3</td>\n",
       "      <td>63</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>B类</th>\n",
       "      <td>2</td>\n",
       "      <td>57512</td>\n",
       "      <td>2</td>\n",
       "      <td>三线城市三线城市</td>\n",
       "      <td>2</td>\n",
       "      <td>否否</td>\n",
       "      <td>2</td>\n",
       "      <td>46</td>\n",
       "      <td>2</td>\n",
       "      <td>39</td>\n",
       "      <td>2</td>\n",
       "      <td>39</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>C类</th>\n",
       "      <td>1</td>\n",
       "      <td>2459</td>\n",
       "      <td>1</td>\n",
       "      <td>一线城市</td>\n",
       "      <td>1</td>\n",
       "      <td>是</td>\n",
       "      <td>1</td>\n",
       "      <td>7</td>\n",
       "      <td>1</td>\n",
       "      <td>8</td>\n",
       "      <td>1</td>\n",
       "      <td>14</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      用户ID            区域                是否省会       7月销量      8月销量      9月销量  \\\n",
       "     count     sum count           sum count  sum count sum count sum count   \n",
       "客户分类                                                                          \n",
       "A类       3  127816     3  一线城市二线城市二线城市     3  是是否     3  56     3  41     3   \n",
       "B类       2   57512     2      三线城市三线城市     2   否否     2  46     2  39     2   \n",
       "C类       1    2459     1          一线城市     1    是     1   7     1   8     1   \n",
       "\n",
       "          \n",
       "     sum  \n",
       "客户分类      \n",
       "A类    63  \n",
       "B类    39  \n",
       "C类    14  "
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.groupby(\"客户分类\").aggregate([\"count\",\"sum\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>用户ID</th>\n",
       "      <th>7月销量</th>\n",
       "      <th>8月销量</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>客户分类</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>A类</th>\n",
       "      <td>3</td>\n",
       "      <td>56</td>\n",
       "      <td>41</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>B类</th>\n",
       "      <td>2</td>\n",
       "      <td>46</td>\n",
       "      <td>39</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>C类</th>\n",
       "      <td>1</td>\n",
       "      <td>7</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      用户ID  7月销量  8月销量\n",
       "客户分类                  \n",
       "A类       3    56    41\n",
       "B类       2    46    39\n",
       "C类       1     7     8"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.groupby(\"客户分类\").aggregate({\"用户ID\":\"count\",\"7月销量\":\"sum\",\"8月销量\":\"sum\"})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>用户ID</th>\n",
       "      <th>7月销量</th>\n",
       "      <th>8月销量</th>\n",
       "      <th>9月销量</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>客户分类</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>A类</th>\n",
       "      <td>127816</td>\n",
       "      <td>56</td>\n",
       "      <td>41</td>\n",
       "      <td>63</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>B类</th>\n",
       "      <td>57512</td>\n",
       "      <td>46</td>\n",
       "      <td>39</td>\n",
       "      <td>39</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>C类</th>\n",
       "      <td>2459</td>\n",
       "      <td>7</td>\n",
       "      <td>8</td>\n",
       "      <td>14</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        用户ID  7月销量  8月销量  9月销量\n",
       "客户分类                          \n",
       "A类    127816    56    41    63\n",
       "B类     57512    46    39    39\n",
       "C类      2459     7     8    14"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.groupby(\"客户分类\").sum()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 对分组后的结果进行重置索引"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>客户分类</th>\n",
       "      <th>用户ID</th>\n",
       "      <th>7月销量</th>\n",
       "      <th>8月销量</th>\n",
       "      <th>9月销量</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>A类</td>\n",
       "      <td>127816</td>\n",
       "      <td>56</td>\n",
       "      <td>41</td>\n",
       "      <td>63</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>B类</td>\n",
       "      <td>57512</td>\n",
       "      <td>46</td>\n",
       "      <td>39</td>\n",
       "      <td>39</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>C类</td>\n",
       "      <td>2459</td>\n",
       "      <td>7</td>\n",
       "      <td>8</td>\n",
       "      <td>14</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  客户分类    用户ID  7月销量  8月销量  9月销量\n",
       "0   A类  127816    56    41    63\n",
       "1   B类   57512    46    39    39\n",
       "2   C类    2459     7     8    14"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#利用reset_index()方法重置索引\n",
    "df.groupby(\"客户分类\").sum().reset_index()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 数据透析   \n",
    "Python中数据透析表用pivot_table()方法，参数如下：  \n",
    "pd.pivot_table(data, values = None, index = None, columns = None, aggfunc = 'mean', fill_value = None, magrins = False, dropna = False, margins_name = 'All')  \n",
    "- data表示要做数据透析表的整个表 \n",
    "- values对应Excel中的值的那个框\n",
    "- index对应Excel中行的那个框\n",
    "- columns对应Excel中列的那个框\n",
    "- aggfunc表示values的计算类型\n",
    "- fill_value表示对空值的填充\n",
    "- margins 表示是否显示合并的列\n",
    "- dropna表似乎删除缺失，如果为真时，则把一整行全部作为缺失值删除\n",
    "- margins_name表示合并的列"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th>区域</th>\n",
       "      <th>一线城市</th>\n",
       "      <th>三线城市</th>\n",
       "      <th>二线城市</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>客户分类</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>A类</th>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>B类</th>\n",
       "      <td>NaN</td>\n",
       "      <td>2.0</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>C类</th>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "区域    一线城市  三线城市  二线城市\n",
       "客户分类                  \n",
       "A类     1.0   NaN   2.0\n",
       "B类     NaN   2.0   NaN\n",
       "C类     1.0   NaN   NaN"
      ]
     },
     "execution_count": 49,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "df = pd.read_excel(r\"../Data/Chapter10.xlsx\",sheet_name =0)\n",
    "pd.pivot_table(df,values = \"用户ID\",columns =\"区域\",index=\"客户分类\",aggfunc=\"count\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th>区域</th>\n",
       "      <th>一线城市</th>\n",
       "      <th>三线城市</th>\n",
       "      <th>二线城市</th>\n",
       "      <th>All</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>客户分类</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>A类</th>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2.0</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>B类</th>\n",
       "      <td>NaN</td>\n",
       "      <td>2.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>C类</th>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>All</th>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "区域    一线城市  三线城市  二线城市  All\n",
       "客户分类                       \n",
       "A类     1.0   NaN   2.0    3\n",
       "B类     NaN   2.0   NaN    2\n",
       "C类     1.0   NaN   NaN    1\n",
       "All    2.0   2.0   2.0    6"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.pivot_table(df,values = \"用户ID\",columns =\"区域\",index=\"客户分类\",aggfunc=\"count\",margins = True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th>区域</th>\n",
       "      <th>一线城市</th>\n",
       "      <th>三线城市</th>\n",
       "      <th>二线城市</th>\n",
       "      <th>All</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>客户分类</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>A类</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>B类</th>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>C类</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>All</th>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "区域    一线城市  三线城市  二线城市  All\n",
       "客户分类                       \n",
       "A类       1     0     2    3\n",
       "B类       0     2     0    2\n",
       "C类       1     0     0    1\n",
       "All      2     2     2    6"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.pivot_table(df,values = \"用户ID\",columns =\"区域\",index=\"客户分类\",aggfunc=\"count\",margins = True,fill_value =0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr:last-of-type th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th colspan=\"3\" halign=\"left\">7月销量</th>\n",
       "      <th colspan=\"3\" halign=\"left\">用户ID</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>区域</th>\n",
       "      <th>一线城市</th>\n",
       "      <th>三线城市</th>\n",
       "      <th>二线城市</th>\n",
       "      <th>一线城市</th>\n",
       "      <th>三线城市</th>\n",
       "      <th>二线城市</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>客户分类</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>A类</th>\n",
       "      <td>6.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>50.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>B类</th>\n",
       "      <td>NaN</td>\n",
       "      <td>46.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2.0</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>C类</th>\n",
       "      <td>7.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     7月销量             用户ID          \n",
       "区域   一线城市  三线城市  二线城市 一线城市 三线城市 二线城市\n",
       "客户分类                                \n",
       "A类    6.0   NaN  50.0  1.0  NaN  2.0\n",
       "B类    NaN  46.0   NaN  NaN  2.0  NaN\n",
       "C类    7.0   NaN   NaN  1.0  NaN  NaN"
      ]
     },
     "execution_count": 37,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.pivot_table(df,values = [\"用户ID\",\"7月销量\"],columns=\"区域\",index=\"客户分类\",aggfunc={\"用户ID\":\"count\",\"7月销量\":\"sum\"})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th>区域</th>\n",
       "      <th>一线城市</th>\n",
       "      <th>三线城市</th>\n",
       "      <th>二线城市</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>客户分类</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>A类</th>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>B类</th>\n",
       "      <td>NaN</td>\n",
       "      <td>2.0</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>C类</th>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "区域    一线城市  三线城市  二线城市\n",
       "客户分类                  \n",
       "A类     1.0   NaN   2.0\n",
       "B类     NaN   2.0   NaN\n",
       "C类     1.0   NaN   NaN"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.pivot_table(df,values=\"用户ID\",columns=\"区域\",index=\"客户分类\",aggfunc=\"count\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th>区域</th>\n",
       "      <th>客户分类</th>\n",
       "      <th>一线城市</th>\n",
       "      <th>三线城市</th>\n",
       "      <th>二线城市</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>A类</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>B类</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2.0</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>C类</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "区域 客户分类  一线城市  三线城市  二线城市\n",
       "0    A类   1.0   NaN   2.0\n",
       "1    B类   NaN   2.0   NaN\n",
       "2    C类   1.0   NaN   NaN"
      ]
     },
     "execution_count": 40,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.pivot_table(df,values=\"用户ID\",columns=\"区域\",index=\"客户分类\",aggfunc=\"count\").reset_index()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.0"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "第10章 数据分组 数据透视表",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": true
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
